--------------------------------------------------------------------------------------------------------------------------------------

Level 1

--------------------------------------------------------------------------------------------------------------------------------------

_________

Task 1 : Data Exploration and Preprocessing

_________

In [ ]:
# importing the libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium

import warnings
warnings.filterwarnings('ignore')
In [ ]:
# reading dataset

retail=pd.read_csv('Dataset .csv')
retail
Out[ ]:
Restaurant ID Restaurant Name Country Code City Address Locality Locality Verbose Longitude Latitude Cuisines ... Currency Has Table booking Has Online delivery Is delivering now Switch to order menu Price range Aggregate rating Rating color Rating text Votes
0 6317637 Le Petit Souffle 162 Makati City Third Floor, Century City Mall, Kalayaan Avenu... Century City Mall, Poblacion, Makati City Century City Mall, Poblacion, Makati City, Mak... 121.027535 14.565443 French, Japanese, Desserts ... Botswana Pula(P) Yes No No No 3 4.8 Dark Green Excellent 314
1 6304287 Izakaya Kikufuji 162 Makati City Little Tokyo, 2277 Chino Roces Avenue, Legaspi... Little Tokyo, Legaspi Village, Makati City Little Tokyo, Legaspi Village, Makati City, Ma... 121.014101 14.553708 Japanese ... Botswana Pula(P) Yes No No No 3 4.5 Dark Green Excellent 591
2 6300002 Heat - Edsa Shangri-La 162 Mandaluyong City Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal... Edsa Shangri-La, Ortigas, Mandaluyong City Edsa Shangri-La, Ortigas, Mandaluyong City, Ma... 121.056831 14.581404 Seafood, Asian, Filipino, Indian ... Botswana Pula(P) Yes No No No 4 4.4 Green Very Good 270
3 6318506 Ooma 162 Mandaluyong City Third Floor, Mega Fashion Hall, SM Megamall, O... SM Megamall, Ortigas, Mandaluyong City SM Megamall, Ortigas, Mandaluyong City, Mandal... 121.056475 14.585318 Japanese, Sushi ... Botswana Pula(P) No No No No 4 4.9 Dark Green Excellent 365
4 6314302 Sambo Kojin 162 Mandaluyong City Third Floor, Mega Atrium, SM Megamall, Ortigas... SM Megamall, Ortigas, Mandaluyong City SM Megamall, Ortigas, Mandaluyong City, Mandal... 121.057508 14.584450 Japanese, Korean ... Botswana Pula(P) Yes No No No 4 4.8 Dark Green Excellent 229
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
9546 5915730 Naml۱ Gurme 208 ��stanbul Kemanke�� Karamustafa Pa��a Mahallesi, R۱ht۱m ... Karak�_y Karak�_y, ��stanbul 28.977392 41.022793 Turkish ... Turkish Lira(TL) No No No No 3 4.1 Green Very Good 788
9547 5908749 Ceviz A��ac۱ 208 ��stanbul Ko��uyolu Mahallesi, Muhittin ��st�_nda�� Cadd... Ko��uyolu Ko��uyolu, ��stanbul 29.041297 41.009847 World Cuisine, Patisserie, Cafe ... Turkish Lira(TL) No No No No 3 4.2 Green Very Good 1034
9548 5915807 Huqqa 208 ��stanbul Kuru�_e��me Mahallesi, Muallim Naci Caddesi, N... Kuru�_e��me Kuru�_e��me, ��stanbul 29.034640 41.055817 Italian, World Cuisine ... Turkish Lira(TL) No No No No 4 3.7 Yellow Good 661
9549 5916112 A���k Kahve 208 ��stanbul Kuru�_e��me Mahallesi, Muallim Naci Caddesi, N... Kuru�_e��me Kuru�_e��me, ��stanbul 29.036019 41.057979 Restaurant Cafe ... Turkish Lira(TL) No No No No 4 4.0 Green Very Good 901
9550 5927402 Walter's Coffee Roastery 208 ��stanbul Cafea��a Mahallesi, Bademalt۱ Sokak, No 21/B, ... Moda Moda, ��stanbul 29.026016 40.984776 Cafe ... Turkish Lira(TL) No No No No 2 4.0 Green Very Good 591

9551 rows × 21 columns

In [ ]:
# printing the first 5 rows of the dataset

retail.head()
Out[ ]:
Restaurant ID Restaurant Name Country Code City Address Locality Locality Verbose Longitude Latitude Cuisines ... Currency Has Table booking Has Online delivery Is delivering now Switch to order menu Price range Aggregate rating Rating color Rating text Votes
0 6317637 Le Petit Souffle 162 Makati City Third Floor, Century City Mall, Kalayaan Avenu... Century City Mall, Poblacion, Makati City Century City Mall, Poblacion, Makati City, Mak... 121.027535 14.565443 French, Japanese, Desserts ... Botswana Pula(P) Yes No No No 3 4.8 Dark Green Excellent 314
1 6304287 Izakaya Kikufuji 162 Makati City Little Tokyo, 2277 Chino Roces Avenue, Legaspi... Little Tokyo, Legaspi Village, Makati City Little Tokyo, Legaspi Village, Makati City, Ma... 121.014101 14.553708 Japanese ... Botswana Pula(P) Yes No No No 3 4.5 Dark Green Excellent 591
2 6300002 Heat - Edsa Shangri-La 162 Mandaluyong City Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal... Edsa Shangri-La, Ortigas, Mandaluyong City Edsa Shangri-La, Ortigas, Mandaluyong City, Ma... 121.056831 14.581404 Seafood, Asian, Filipino, Indian ... Botswana Pula(P) Yes No No No 4 4.4 Green Very Good 270
3 6318506 Ooma 162 Mandaluyong City Third Floor, Mega Fashion Hall, SM Megamall, O... SM Megamall, Ortigas, Mandaluyong City SM Megamall, Ortigas, Mandaluyong City, Mandal... 121.056475 14.585318 Japanese, Sushi ... Botswana Pula(P) No No No No 4 4.9 Dark Green Excellent 365
4 6314302 Sambo Kojin 162 Mandaluyong City Third Floor, Mega Atrium, SM Megamall, Ortigas... SM Megamall, Ortigas, Mandaluyong City SM Megamall, Ortigas, Mandaluyong City, Mandal... 121.057508 14.584450 Japanese, Korean ... Botswana Pula(P) Yes No No No 4 4.8 Dark Green Excellent 229

5 rows × 21 columns

In [ ]:
# printing sample rows of the dataset

retail.sample(5)
Out[ ]:
Restaurant ID Restaurant Name Country Code City Address Locality Locality Verbose Longitude Latitude Cuisines ... Currency Has Table booking Has Online delivery Is delivering now Switch to order menu Price range Aggregate rating Rating color Rating text Votes
6723 8771 Ashu Bhature Wala 1 New Delhi B-6/150, Sector 8, Rohini, New Delhi Rohini Rohini, New Delhi 77.125281 28.703973 Street Food ... Indian Rupees(Rs.) No No No No 1 3.3 Orange Average 79
2447 15705 Kitchen At 95 - Hyatt Regency 1 Ludhiana Hyatt Regency, Site 4, Ferozepur Road, Rajguru... Hyatt Regency, Rajguru Nagar Hyatt Regency, Rajguru Nagar, Ludhiana 75.786976 30.885814 Mediterranean, Chinese, Continental ... Indian Rupees(Rs.) No No No No 4 4.3 Green Very Good 87
6281 18350101 The Taste of Delhi 1 New Delhi Shop 2, 4, 6, & 7, CD Block, Sagar Complex, Pi... Pitampura Pitampura, New Delhi 77.137776 28.706518 North Indian ... Indian Rupees(Rs.) No Yes No No 2 3.3 Orange Average 128
5032 311067 Chatori Zubaan 2 1 New Delhi Near Nike Showroom, Rangpuri, NH-8, Mahipalpur... Mahipalpur Mahipalpur, New Delhi 77.116606 28.538537 North Indian, Chinese ... Indian Rupees(Rs.) No No No No 2 3.0 Orange Average 5
7234 311364 Believe in Taste 1 New Delhi G-19, BB Block, Vardhaman Complex, Shalimar Ba... Shalimar Bagh Shalimar Bagh, New Delhi 77.156470 28.715299 North Indian, Chinese, Fast Food ... Indian Rupees(Rs.) No No No No 1 3.0 Orange Average 7

5 rows × 21 columns

In [ ]:
# dimensions of the dataset

retail.shape
Out[ ]:
(9551, 21)
In [ ]:
# returns the concise summary of the dataset

retail.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9551 entries, 0 to 9550
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         9551 non-null   int64  
 1   Restaurant Name       9551 non-null   object 
 2   Country Code          9551 non-null   int64  
 3   City                  9551 non-null   object 
 4   Address               9551 non-null   object 
 5   Locality              9551 non-null   object 
 6   Locality Verbose      9551 non-null   object 
 7   Longitude             9551 non-null   float64
 8   Latitude              9551 non-null   float64
 9   Cuisines              9542 non-null   object 
 10  Average Cost for two  9551 non-null   int64  
 11  Currency              9551 non-null   object 
 12  Has Table booking     9551 non-null   object 
 13  Has Online delivery   9551 non-null   object 
 14  Is delivering now     9551 non-null   object 
 15  Switch to order menu  9551 non-null   object 
 16  Price range           9551 non-null   int64  
 17  Aggregate rating      9551 non-null   float64
 18  Rating color          9551 non-null   object 
 19  Rating text           9551 non-null   object 
 20  Votes                 9551 non-null   int64  
dtypes: float64(3), int64(5), object(13)
memory usage: 1.5+ MB
In [ ]:
# displaying the number of unique data in each column

retail.nunique()
Out[ ]:
Restaurant ID           9551
Restaurant Name         7446
Country Code              15
City                     141
Address                 8918
Locality                1208
Locality Verbose        1265
Longitude               8120
Latitude                8677
Cuisines                1825
Average Cost for two     140
Currency                  12
Has Table booking          2
Has Online delivery        2
Is delivering now          2
Switch to order menu       1
Price range                4
Aggregate rating          33
Rating color               6
Rating text                6
Votes                   1012
dtype: int64
In [ ]:
# checking for duplicates in the dataset

retail.duplicated().sum()
Out[ ]:
0
In [ ]:
# checking for missing data

retail.isnull().sum()
Out[ ]:
Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64

From this we understood that the column "Cuisines" has 9 missing values

In [ ]:
# Calculate the mode of the 'Cuisines' column
mode_cuisines = retail['Cuisines'].mode()[0]
mode_cuisines
Out[ ]:
'North Indian'
In [ ]:
# Replace missing values in the 'Cuisines' column with the mode
retail['Cuisines'].fillna(mode_cuisines, inplace=True)
In [ ]:
retail
Out[ ]:
Restaurant ID Restaurant Name Country Code City Address Locality Locality Verbose Longitude Latitude Cuisines ... Currency Has Table booking Has Online delivery Is delivering now Switch to order menu Price range Aggregate rating Rating color Rating text Votes
0 6317637 Le Petit Souffle 162 Makati City Third Floor, Century City Mall, Kalayaan Avenu... Century City Mall, Poblacion, Makati City Century City Mall, Poblacion, Makati City, Mak... 121.027535 14.565443 French, Japanese, Desserts ... Botswana Pula(P) Yes No No No 3 4.8 Dark Green Excellent 314
1 6304287 Izakaya Kikufuji 162 Makati City Little Tokyo, 2277 Chino Roces Avenue, Legaspi... Little Tokyo, Legaspi Village, Makati City Little Tokyo, Legaspi Village, Makati City, Ma... 121.014101 14.553708 Japanese ... Botswana Pula(P) Yes No No No 3 4.5 Dark Green Excellent 591
2 6300002 Heat - Edsa Shangri-La 162 Mandaluyong City Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal... Edsa Shangri-La, Ortigas, Mandaluyong City Edsa Shangri-La, Ortigas, Mandaluyong City, Ma... 121.056831 14.581404 Seafood, Asian, Filipino, Indian ... Botswana Pula(P) Yes No No No 4 4.4 Green Very Good 270
3 6318506 Ooma 162 Mandaluyong City Third Floor, Mega Fashion Hall, SM Megamall, O... SM Megamall, Ortigas, Mandaluyong City SM Megamall, Ortigas, Mandaluyong City, Mandal... 121.056475 14.585318 Japanese, Sushi ... Botswana Pula(P) No No No No 4 4.9 Dark Green Excellent 365
4 6314302 Sambo Kojin 162 Mandaluyong City Third Floor, Mega Atrium, SM Megamall, Ortigas... SM Megamall, Ortigas, Mandaluyong City SM Megamall, Ortigas, Mandaluyong City, Mandal... 121.057508 14.584450 Japanese, Korean ... Botswana Pula(P) Yes No No No 4 4.8 Dark Green Excellent 229
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
9546 5915730 Naml۱ Gurme 208 ��stanbul Kemanke�� Karamustafa Pa��a Mahallesi, R۱ht۱m ... Karak�_y Karak�_y, ��stanbul 28.977392 41.022793 Turkish ... Turkish Lira(TL) No No No No 3 4.1 Green Very Good 788
9547 5908749 Ceviz A��ac۱ 208 ��stanbul Ko��uyolu Mahallesi, Muhittin ��st�_nda�� Cadd... Ko��uyolu Ko��uyolu, ��stanbul 29.041297 41.009847 World Cuisine, Patisserie, Cafe ... Turkish Lira(TL) No No No No 3 4.2 Green Very Good 1034
9548 5915807 Huqqa 208 ��stanbul Kuru�_e��me Mahallesi, Muallim Naci Caddesi, N... Kuru�_e��me Kuru�_e��me, ��stanbul 29.034640 41.055817 Italian, World Cuisine ... Turkish Lira(TL) No No No No 4 3.7 Yellow Good 661
9549 5916112 A���k Kahve 208 ��stanbul Kuru�_e��me Mahallesi, Muallim Naci Caddesi, N... Kuru�_e��me Kuru�_e��me, ��stanbul 29.036019 41.057979 Restaurant Cafe ... Turkish Lira(TL) No No No No 4 4.0 Green Very Good 901
9550 5927402 Walter's Coffee Roastery 208 ��stanbul Cafea��a Mahallesi, Bademalt۱ Sokak, No 21/B, ... Moda Moda, ��stanbul 29.026016 40.984776 Cafe ... Turkish Lira(TL) No No No No 2 4.0 Green Very Good 591

9551 rows × 21 columns

In [ ]:
retail.isnull().sum()
Out[ ]:
Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                0
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64

Now, all the missing values in the column "Cuisines" is replaced with the most frequent value in that same column.

In [ ]:
# Check data types
print(retail.dtypes)
Restaurant ID             int64
Restaurant Name          object
Country Code              int64
City                     object
Address                  object
Locality                 object
Locality Verbose         object
Longitude               float64
Latitude                float64
Cuisines                 object
Average Cost for two      int64
Currency                 object
Has Table booking        object
Has Online delivery      object
Is delivering now        object
Switch to order menu     object
Price range               int64
Aggregate rating        float64
Rating color             object
Rating text              object
Votes                     int64
dtype: object
In [ ]:
# Display summary statistics to further inspect the 'Aggregate rating' column
print(retail['Aggregate rating'].describe())
count    9551.000000
mean        2.666370
std         1.516378
min         0.000000
25%         2.500000
50%         3.200000
75%         3.700000
max         4.900000
Name: Aggregate rating, dtype: float64
In [ ]:
# Analyze the distribution of the 'Aggregate rating'
plt.figure(figsize=(10, 6))
sns.histplot(retail['Aggregate rating'], bins=10, kde=True)
plt.title('Distribution of Aggregate Rating')
plt.xlabel('Aggregate Rating')
plt.ylabel('Frequency')
plt.show()
No description has been provided for this image
In [ ]:
# Identify class imbalances
# We can use value_counts to see the distribution
rating_counts = retail['Aggregate rating'].value_counts().sort_index()
print(rating_counts)
Aggregate rating
0.0    2148
1.8       1
1.9       2
2.0       7
2.1      15
2.2      27
2.3      47
2.4      87
2.5     110
2.6     191
2.7     250
2.8     315
2.9     381
3.0     468
3.1     519
3.2     522
3.3     483
3.4     498
3.5     480
3.6     458
3.7     427
3.8     400
3.9     335
4.0     266
4.1     274
4.2     221
4.3     174
4.4     144
4.5      95
4.6      78
4.7      42
4.8      25
4.9      61
Name: count, dtype: int64
In [ ]:
# Plot the distribution to visualize class imbalance
plt.figure(figsize=(12, 8))
sns.barplot(x=rating_counts.index, y=rating_counts.values, palette='viridis')
plt.title('Class Distribution of Aggregate Rating')
plt.xlabel('Aggregate Rating')
plt.ylabel('Number of Occurrences')
plt.xticks(rotation=90)
plt.show()
No description has been provided for this image
In [ ]:
 

_________

Task 2 : Descriptive Analysis

_________

In [ ]:
# returns statistical summary 

retail.describe()
Out[ ]:
Restaurant ID Country Code Longitude Latitude Average Cost for two Price range Aggregate rating Votes
count 9.551000e+03 9551.000000 9551.000000 9551.000000 9551.000000 9551.000000 9551.000000 9551.000000
mean 9.051128e+06 18.365616 64.126574 25.854381 1199.210763 1.804837 2.666370 156.909748
std 8.791521e+06 56.750546 41.467058 11.007935 16121.183073 0.905609 1.516378 430.169145
min 5.300000e+01 1.000000 -157.948486 -41.330428 0.000000 1.000000 0.000000 0.000000
25% 3.019625e+05 1.000000 77.081343 28.478713 250.000000 1.000000 2.500000 5.000000
50% 6.004089e+06 1.000000 77.191964 28.570469 400.000000 2.000000 3.200000 31.000000
75% 1.835229e+07 1.000000 77.282006 28.642758 700.000000 2.000000 3.700000 131.000000
max 1.850065e+07 216.000000 174.832089 55.976980 800000.000000 4.000000 4.900000 10934.000000
In [ ]:
# Function to plot the distribution of a categorical variable
def plot_categorical_distribution(column_name, dataset, top_n=None):
    plt.figure(figsize=(12, 8))
    
    if top_n:
        # Get the top N categories
        top_categories = dataset[column_name].value_counts().nlargest(top_n)
        sns.barplot(x=top_categories.index, y=top_categories.values, palette='viridis')
    else:
        category_counts = dataset[column_name].value_counts()
        sns.barplot(x=category_counts.index, y=category_counts.values, palette='viridis')

    plt.title(f'Distribution of {column_name}')
    plt.xlabel(column_name)
    plt.ylabel('Number of Occurrences')
    plt.xticks(rotation=90)
    plt.show()
In [ ]:
plot_categorical_distribution('Country Code', retail)
No description has been provided for this image
In [ ]:
# Explore the distribution of "City" (limiting to top 20 for better visualization)
plot_categorical_distribution('City', retail, top_n=20)
No description has been provided for this image
In [ ]:
# Explore the distribution of "Cuisines" (limiting to top 20 for better visualization)
plot_categorical_distribution('Cuisines', retail, top_n=20)
No description has been provided for this image
In [ ]:
# Function to plot top N categories
def plot_top_n_categories(column_name, dataset, n=10):
    top_categories = dataset[column_name].value_counts().nlargest(n)
    plt.figure(figsize=(12, 8))
    sns.barplot(x=top_categories.values, y=top_categories.index, palette='viridis')
    plt.title(f'Top {n} {column_name}')
    plt.xlabel('Number of Restaurants')
    plt.ylabel(column_name)
    plt.show()

# Identify and plot the top cuisines
plot_top_n_categories('Cuisines', retail)

# Identify and plot the top cities
plot_top_n_categories('City', retail)
No description has been provided for this image
No description has been provided for this image
In [ ]:
 

_________

Task 3 : Geospatial Analysis

_________

In [ ]:
# Initialize the map centered around a point (e.g., first restaurant location)
map_center = [retail['Latitude'].iloc[0], retail['Longitude'].iloc[0]]
mymap = folium.Map(location=map_center, zoom_start=12)

# Add markers for each restaurant location
for idx, row in retail.iterrows():
    folium.Marker([row['Latitude'], row['Longitude']], popup=row['Restaurant Name']).add_to(mymap)

# Save the map as an HTML file
mymap.save('restaurant_locations_map.html')

# Display the map
mymap
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# Analyze distribution across cities
plt.figure(figsize=(24, 22))
city_counts = retail['City'].value_counts().sort_values(ascending=False)
sns.barplot(x=city_counts.index, y=city_counts.values, palette='viridis')
plt.title('Number of Restaurants by City')
plt.xlabel('City')
plt.ylabel('Number of Restaurants')
plt.xticks(rotation=90)
plt.show()
No description has been provided for this image
In [ ]:
if 'Country Code' in retail.columns:
    plt.figure(figsize=(12, 6))
    country_counts = retail['Country Code'].value_counts().sort_values(ascending=False)
    sns.barplot(x=country_counts.index, y=country_counts.values, palette='viridis')
    plt.title('Number of Restaurants by Country Code')
    plt.xlabel('Country Code')
    plt.ylabel('Number of Restaurants')
    plt.xticks(rotation=90)
    plt.show()
No description has been provided for this image
In [ ]:
# Explore correlation between location (latitude and longitude) and rating
plt.figure(figsize=(10, 8))
sns.scatterplot(x='Longitude', y='Latitude', hue='Aggregate rating', data=retail, palette='viridis', size='Aggregate rating', sizes=(20, 200))
plt.title('Restaurant Locations and Ratings')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(title='Aggregate Rating')
plt.show()
No description has been provided for this image
In [ ]:
# Calculate correlation coefficient between location and rating
correlation = retail[['Latitude', 'Longitude', 'Aggregate rating']].corr()
print("Correlation Matrix:")
print(correlation)
Correlation Matrix:
                  Latitude  Longitude  Aggregate rating
Latitude          1.000000   0.043207          0.000516
Longitude         0.043207   1.000000         -0.116818
Aggregate rating  0.000516  -0.116818          1.000000

The analysis shows that the geographical location (latitude and longitude) of restaurants does not have a significant linear impact on their aggregate ratings